NTHREADS=20
analysis_tag <- '04_velocyto_batch'
library(pheatmap)
library(knitr)
library(scater)
library(Seurat)
library(Cairo)
library(edgeR)
library(data.table)
## library(biomaRt)
library(dplyr)
## library(reshape2)
#library(iSEE)
## library(Matrix)
## library(scran)
## library(mvoutlier)
## library(shiny)
## library(kableExtra)
## library(velocyto.R)
## library(cowplot)
## library(corrplot)
library(DT)
library(devtools)
## library(viridis)
## library("CellMixS")
library(gplots)
library(pheatmap)
library(velocyto.R)
ac <- function(col, alpha = 1) {
apply(sapply(col, col2rgb)/255, 2, function(x) rgb(x[1],
x[2], x[3], alpha = alpha))
}
Provided by Jaeger
so <- readRDS(file.path("..", "data", "Seurat_object_Gli_Ascl_combi_meta.Robj"))
Plus velocyto looms (from task 02_velocyto_mapping)
looms <- list()
for (fn in list.files(file.path("..", "data", "looms"), pattern = ".*loom",
recursive = TRUE)) {
ldat <- read.loom.matrices(file.path("..", "data", "looms",
fn))
ldat <- lapply(ldat, function(x) {
colnames(x) <- gsub("Aligned.sortedByCoord.out.bam",
"", colnames(x))
x
})
ldat <- lapply(ldat, function(x) {
colnames(x) <- gsub(".*:", "", colnames(x))
x
})
looms[[dirname(fn)]] <- ldat
}
reading loom file via hdf5r...
reading loom file via hdf5r...
reading loom file via hdf5r...
reading loom file via hdf5r...
reading loom file via hdf5r...
reading loom file via hdf5r...
reading loom file via hdf5r...
To be filtered out from velocyto calculations
alt_batch <- as.character(so@meta.data$batch)
alt_batch[alt_batch == "Ascl1_12wk_1"] <- "B1"
alt_batch[alt_batch == "Ascl1_12wk_2"] <- "B2"
alt_batch[alt_batch == "Ascl1_5day_1"] <- "B1"
alt_batch[alt_batch == "Ascl1_5day_2"] <- "B2"
alt_batch[alt_batch == "Gli1_12wk"] <- "B1"
alt_batch[alt_batch == "Gli1_5day_1"] <- "B1"
alt_batch[alt_batch == "Gli1_5day_2"] <- "B1"
alt_batch <- as.factor(alt_batch)
table(alt_batch)
alt_batch
B1 B2
642 413
so@meta.data$alt_batch <- alt_batch
DefaultAssay(so) <- "RNA"
Idents(so) <- "alt_batch"
markers <- FindMarkers(so, ident.1 = "B1", ident.2 = "B2", test.use = "LR",
latent.vars = "combi", min.pct = 0.2)
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
markers <- markers[order(markers$p_val_adj), ]
markers <- markers[markers$p_val_adj < 0.05, ]
markers$analysis <- "batch_relevant_lr"
markers <- data.frame(gene = rownames(markers), markers)
DT::datatable(markers %>% as.data.frame() %>% dplyr::mutate_if(is.numeric,
funs(round(., 2))), extensions = c("Buttons", "FixedColumns"),
rownames = FALSE, options = list(dom = "Bfrtip", scrollX = TRUE,
fixedColumns = list(leftColumns = 1), buttons = c("csv",
"excel")))
Removal of batch-affected genes (simple set differences between count tables and the significantly batch-affected genes)
## selected <- setdiff(markers$gene, VariableFeatures(so))
for (loom in names(looms)) {
for (item in names(looms[[loom]])) {
curr <- setdiff(rownames(looms[[loom]][[item]]), markers$gene)
looms[[loom]][[item]] <- looms[[loom]][[item]][curr,
]
}
}
Pasting loom files together (assumes equivalent gene sorting)
ldat <- list()
ldat$spliced <- do.call(cbind.data.frame, sapply(looms, function(x) return(x$spliced)))
ldat$unspliced <- do.call(cbind.data.frame, sapply(looms, function(x) return(x$unspliced)))
ldat$spanning <- do.call(cbind.data.frame, sapply(looms, function(x) return(x$spanning)))
for (item in names(ldat)) {
colnames(ldat[[item]]) <- sapply(strsplit(colnames(ldat[[item]]),
".", fixed = TRUE), function(x) return(x[2]))
}
Making sure we don’t have the batch-affected genes.
stopifnot(length(intersect(as.character(markers$gene), rownames(ldat$spliced))) ==
0)
Remove cells that are not in use (Seurat’s object contains the QC-ed cells only).
for (item in names(ldat)) {
ldat[[item]] <- ldat[[item]][, intersect(colnames(ldat[[item]]),
colnames(so))]
}
Coloring cells according to the combi Jaeger’s naming that includes the genotype and the cell cluster
cell_colors <- so@meta.data["combi"]
Distances as in Seurat’s PCA
cell_dist <- as.dist(1 - armaCor(t(Embeddings(so, reduction = "pca"))))
# exonic read (spliced) expression matrix
emat <- ldat$spliced
# intronic read (unspliced) expression matrix
nmat <- ldat$unspliced
# spanning read (intron+exon) expression matrix
smat <- ldat$spanning
summary(apply(emat, 1, mean))
Min. 1st Qu. Median Mean 3rd Qu. Max.
0.000 0.000 0.000 2.753 0.102 4857.934
summary(apply(nmat, 1, mean))
Min. 1st Qu. Median Mean 3rd Qu. Max.
0.0000 0.0000 0.0000 0.5442 0.0154 1339.1415
summary(apply(smat, 1, mean))
Min. 1st Qu. Median Mean 3rd Qu. Max.
0.00000 0.00000 0.00000 0.11440 0.00103 270.48615
table(rownames(cell_colors) %in% colnames(emat))
FALSE TRUE
80 975
## cell_colors <- data.frame(cell_colors[rownames(cell_colors)
## %in% colnames(emat),], row.names =
## rownames(cell_colors)[rownames(cell_colors) %in%
## colnames(emat)])
cell_colors$cell <- rownames(cell_colors)
cell_colors$color <- rainbow(8)[as.numeric(as.factor(cell_colors$combi))]
cnames <- cell_colors$cell
cell_colors <- cell_colors$color
names(cell_colors) <- cnames
Please mind and/or finetune the min max cluster average values
## filter expression matrices based on some minimum
## max-cluster averages
emat <- filter.genes.by.cluster.expression(emat, cell_colors,
min.max.cluster.average = 0.2)
nmat <- filter.genes.by.cluster.expression(nmat, cell_colors,
min.max.cluster.average = 0.05)
smat <- filter.genes.by.cluster.expression(smat, cell_colors,
min.max.cluster.average = 0.01)
# look at the resulting gene set
length(intersect(rownames(emat), rownames(nmat)))
[1] 11003
Estimating velocities, note the k parameter
set.seed(2)
fit.quantile <- 0.02
rvel.qf <- gene.relative.velocity.estimates(as.matrix(emat),
as.matrix(nmat), deltaT = 1, kCells = 25, cell.dist = cell_dist,
n.cores = NTHREADS, fit.quantile = fit.quantile)
Warning in labels(cell.dist) == colnames(emat): longer object length is not
a multiple of shorter object length
matching cells between cell.dist and emat/nmat ... done
calculating cell knn ... done
calculating convolved matrices ... done
fitting gamma coefficients ... done. succesfful fit for 11003 genes
filtered out 2747 out of 11003 genes due to low nmat-emat correlation
filtered out 2698 out of 8256 genes due to low nmat-emat slope
calculating RNA velocity shift ... done
calculating extrapolated cell state ... done
pca.velocity.plot(rvel.qf, nPcs = 5, plot.cols = 2, cell.colors = ac(cell_colors,
alpha = 0.7), cex = 1.2, pcount = 0.1, pc.multipliers = c(1,
-1, -1, -1, -1))
log ... pca ... pc multipliers ... delta norm ... done
done
Notice the number of neighbors (n= 100 etc)
umap_coord <- Embeddings(so, reduction = "umap")
show.velocity.on.embedding.cor(umap_coord, rvel.qf, n = 100,
scale = "sqrt", cell.colors = ac(cell_colors, alpha = 0.5),
cex = 1, arrow.scale = 4, arrow.lwd = 1, do.par = TRUE, cell.border.alpha = 0.1)
delta projections ... sqrt knn ... transition probs ... done
calculating arrows ... done
umap_coord_ascl = umap_coord[grep("Ascl", rownames(umap_coord)),
]
umap_coord_gli = umap_coord[!rownames(umap_coord) %in% rownames(umap_coord_ascl),
]
umap_coord_A_5d_12w_2 = umap_coord[grep("Ascl1_12wk_2|II", rownames(umap_coord)),
]
Ascl
## Velocity for Ascl and Gli only
show.velocity.on.embedding.cor(umap_coord_ascl, rvel.qf, n = 100,
scale = "sqrt", cell.colors = ac(cell_colors, alpha = 0.5),
cex = 1, arrow.scale = 4, arrow.lwd = 1, do.par = T, cell.border.alpha = 0.1)
delta projections ... sqrt knn ... transition probs ... done
calculating arrows ... done
A_5d_12w_2
show.velocity.on.embedding.cor(umap_coord_A_5d_12w_2, rvel.qf,
n = 100, scale = "sqrt", cell.colors = ac(cell_colors, alpha = 0.5),
cex = 1, arrow.scale = 4, arrow.lwd = 1, do.par = T, cell.border.alpha = 0.1)
delta projections ... sqrt knn ... transition probs ... done
calculating arrows ... done
Gli
show.velocity.on.embedding.cor(umap_coord_gli, rvel.qf, n = 100,
scale = "sqrt", cell.colors = ac(cell_colors, alpha = 0.5),
cex = 1, arrow.scale = 4, arrow.lwd = 1, do.par = T, cell.border.alpha = 0.1)
delta projections ... sqrt knn ... transition probs ... done
calculating arrows ... done
Shall we increase the number of neighbors while plotting? Example with 300 neighbors.
show.velocity.on.embedding.cor(umap_coord, rvel.qf, n = 300,
scale = "sqrt", cell.colors = ac(cell_colors, alpha = 0.5),
cex = 1, arrow.scale = 4, arrow.lwd = 1, do.par = TRUE, cell.border.alpha = 0.1)
delta projections ... sqrt knn ... transition probs ... done
calculating arrows ... done
save.image("04_velocyto_batch_removal.RData")
date()
[1] "Wed Aug 28 13:14:16 2019"
devtools::session_info()
Session info -------------------------------------------------------------
setting value
version R version 3.5.1 (2018-07-02)
system x86_64, linux-gnu
ui X11
language en_CA:en
collate en_CA.UTF-8
tz Europe/Zurich
date 2019-08-28
Packages -----------------------------------------------------------------
package * version date
ape 5.2 2018-09-24
assertthat 0.2.0 2017-04-11
backports 1.1.2 2017-12-13
base * 3.5.1 2018-07-09
beeswarm 0.2.3 2016-04-25
bindr 0.1.1 2018-03-13
bindrcpp * 0.2.2 2018-03-29
Biobase * 2.40.0 2018-07-09
BiocGenerics * 0.26.0 2018-07-09
BiocParallel * 1.14.1 2018-07-09
bit 1.1-14 2018-05-29
bit64 0.9-7 2017-05-08
bitops 1.0-6 2013-08-17
Cairo * 1.5-9 2015-09-26
caTools 1.17.1.1 2018-07-20
cluster 2.0.7-1 2018-04-13
codetools 0.2-15 2016-10-05
colorspace 1.3-2 2016-12-14
compiler 3.5.1 2018-07-09
cowplot 0.9.4 2019-01-08
crayon 1.3.4 2017-09-16
crosstalk 1.0.0 2016-12-21
data.table * 1.11.4 2018-05-27
datasets * 3.5.1 2018-07-09
DelayedArray * 0.6.1 2018-06-15
DelayedMatrixStats 1.2.0 2018-10-19
devtools * 1.13.6 2018-06-27
digest 0.6.18 2018-10-10
dplyr * 0.7.8 2018-11-10
DT * 0.4 2018-01-30
edgeR * 3.22.3 2018-06-21
evaluate 0.10.1 2017-06-24
fitdistrplus 1.0-11 2018-09-10
formatR 1.5 2017-04-25
future 1.14.0 2019-07-02
future.apply 1.3.0 2019-06-18
gdata 2.18.0 2017-06-06
GenomeInfoDb * 1.16.0 2018-07-09
GenomeInfoDbData 1.1.0 2018-07-09
GenomicRanges * 1.32.7 2018-09-20
ggbeeswarm 0.6.0 2017-08-07
ggplot2 * 3.1.0 2018-10-25
ggrepel 0.8.0 2018-05-09
ggridges 0.5.1 2018-09-27
globals 0.12.4 2018-10-11
glue 1.3.0 2018-07-17
gplots * 3.0.1 2016-03-30
graphics * 3.5.1 2018-07-09
grDevices * 3.5.1 2018-07-09
grid 3.5.1 2018-07-09
gridExtra 2.3 2017-09-09
gtable 0.2.0 2016-02-26
gtools 3.8.1 2018-06-26
hdf5r 1.2.0 2019-04-20
htmltools 0.3.6 2017-04-28
htmlwidgets 1.3 2018-09-30
httpuv 1.4.4.2 2018-07-02
httr 1.3.1 2017-08-20
ica 1.0-2 2018-05-24
igraph 1.2.4.1 2019-04-22
IRanges * 2.14.12 2018-09-20
irlba 2.3.2 2018-01-11
jsonlite 1.5 2017-06-01
KernSmooth 2.23-15 2015-06-29
knitr * 1.22 2019-03-08
later 0.7.3 2018-06-08
lattice 0.20-35 2017-03-25
lazyeval 0.2.1 2017-10-29
leiden 0.3.1 2019-07-23
limma * 3.36.2 2018-06-21
listenv 0.7.0 2018-01-21
lmtest 0.9-36 2018-04-04
locfit 1.5-9.1 2013-04-20
lsei 1.2-0 2017-10-23
magrittr 1.5 2014-11-22
MASS 7.3-50 2018-04-30
Matrix * 1.2-14 2018-04-09
matrixStats * 0.54.0 2018-07-23
memoise 1.1.0 2017-04-21
metap 0.9 2018-04-25
methods * 3.5.1 2018-07-09
mgcv 1.8-24 2018-06-23
mime 0.5 2016-07-07
munsell 0.5.0 2018-06-12
nlme 3.1-137 2018-04-07
npsurv 0.4-0 2017-10-14
parallel * 3.5.1 2018-07-09
pbapply 1.3-4 2018-01-10
pcaMethods 1.72.0 2018-07-13
pheatmap * 1.0.12 2019-01-04
pillar 1.3.1 2018-12-15
pkgconfig 2.0.2 2018-08-16
plotly 4.8.0 2018-07-20
plyr 1.8.4 2016-06-08
png 0.1-7 2013-12-03
promises 1.0.1 2018-04-13
purrr 0.3.0 2019-01-27
R.methodsS3 1.7.1 2016-02-16
R.oo 1.22.0 2018-04-22
R.utils 2.7.0 2018-08-27
R6 2.4.0 2019-02-14
RANN 2.6 2018-07-16
RColorBrewer 1.1-2 2014-12-07
Rcpp 1.0.2 2019-07-25
RcppAnnoy 0.0.12 2019-05-12
RcppParallel 4.4.3 2019-05-22
RCurl 1.95-4.10 2018-01-04
reshape2 1.4.3 2017-12-11
reticulate 1.10 2018-08-05
rhdf5 2.24.0 2018-10-19
Rhdf5lib 1.2.1 2018-10-19
rjson 0.2.20 2018-06-08
rlang 0.3.1 2019-01-08
rmarkdown 1.10 2018-06-11
ROCR 1.0-7 2015-03-26
rprojroot 1.3-2 2018-01-03
rstudioapi 0.7 2017-09-07
rsvd 1.0.2 2019-07-29
Rtsne 0.13 2017-04-14
S4Vectors * 0.18.3 2018-07-09
scales 1.0.0 2018-08-09
scater * 1.8.4 2018-08-13
sctransform 0.2.0 2019-04-12
SDMTools 1.1-221 2014-08-05
Seurat * 3.1.0 2019-08-20
shiny 1.1.0 2018-05-17
shinydashboard 0.7.1 2018-10-17
SingleCellExperiment * 1.2.0 2018-10-19
splines 3.5.1 2018-07-09
stats * 3.5.1 2018-07-09
stats4 * 3.5.1 2018-07-09
stringi 1.2.3 2018-06-12
stringr 1.4.0 2019-02-10
SummarizedExperiment * 1.10.1 2018-07-09
survival 2.42-4 2018-06-30
tibble 2.0.1 2019-01-12
tidyr 0.8.2 2018-10-28
tidyselect 0.2.5 2018-10-11
tools 3.5.1 2018-07-09
tsne 0.1-3 2016-07-15
tximport 1.8.0 2018-10-19
utils * 3.5.1 2018-07-09
uwot 0.1.3 2019-04-07
velocyto.R * 0.6 2019-08-21
vipor 0.4.5 2017-03-22
viridis 0.5.1 2018-03-29
viridisLite 0.3.0 2018-02-01
withr 2.1.2 2018-03-15
xfun 0.3 2018-07-06
xtable 1.8-3 2018-08-29
XVector 0.20.0 2018-07-09
yaml 2.1.19 2018-05-01
zlibbioc 1.26.0 2018-07-09
zoo 1.8-4 2018-09-19
source
CRAN (R 3.5.1)
CRAN (R 3.5.0)
CRAN (R 3.5.1)
local
CRAN (R 3.5.1)
CRAN (R 3.5.1)
CRAN (R 3.5.1)
Bioconductor
Bioconductor
Bioconductor
CRAN (R 3.5.0)
CRAN (R 3.5.1)
CRAN (R 3.5.1)
CRAN (R 3.5.1)
CRAN (R 3.5.1)
CRAN (R 3.5.1)
CRAN (R 3.5.1)
CRAN (R 3.5.1)
local
cran (@0.9.4)
CRAN (R 3.5.0)
CRAN (R 3.5.1)
CRAN (R 3.5.1)
local
Bioconductor
Bioconductor
CRAN (R 3.5.1)
cran (@0.6.18)
cran (@0.7.8)
CRAN (R 3.5.1)
Bioconductor
CRAN (R 3.5.0)
CRAN (R 3.5.1)
CRAN (R 3.5.0)
CRAN (R 3.5.1)
CRAN (R 3.5.1)
CRAN (R 3.5.1)
Bioconductor
Bioconductor
cran (@1.32.7)
CRAN (R 3.5.1)
cran (@3.1.0)
CRAN (R 3.5.1)
CRAN (R 3.5.1)
CRAN (R 3.5.1)
cran (@1.3.0)
CRAN (R 3.5.1)
local
local
local
CRAN (R 3.5.1)
CRAN (R 3.5.1)
CRAN (R 3.5.1)
cran (@1.2.0)
CRAN (R 3.5.1)
CRAN (R 3.5.1)
CRAN (R 3.5.0)
CRAN (R 3.5.1)
CRAN (R 3.5.0)
cran (@1.2.4.1)
cran (@2.14.12)
CRAN (R 3.5.1)
CRAN (R 3.5.1)
CRAN (R 3.5.1)
CRAN (R 3.5.1)
CRAN (R 3.5.0)
CRAN (R 3.5.1)
CRAN (R 3.5.1)
CRAN (R 3.5.1)
Bioconductor
CRAN (R 3.5.1)
CRAN (R 3.5.1)
CRAN (R 3.5.1)
CRAN (R 3.5.1)
CRAN (R 3.5.0)
CRAN (R 3.5.1)
CRAN (R 3.5.0)
CRAN (R 3.5.1)
CRAN (R 3.5.1)
CRAN (R 3.5.0)
local
CRAN (R 3.5.1)
CRAN (R 3.5.0)
CRAN (R 3.5.1)
CRAN (R 3.5.1)
CRAN (R 3.5.1)
local
CRAN (R 3.5.1)
Bioconductor
cran (@1.0.12)
cran (@1.3.1)
cran (@2.0.2)
CRAN (R 3.5.1)
CRAN (R 3.5.1)
CRAN (R 3.5.1)
CRAN (R 3.5.0)
cran (@0.3.0)
CRAN (R 3.5.1)
CRAN (R 3.5.0)
CRAN (R 3.5.1)
cran (@2.4.0)
CRAN (R 3.5.1)
CRAN (R 3.5.1)
cran (@1.0.2)
CRAN (R 3.5.1)
CRAN (R 3.5.1)
CRAN (R 3.5.1)
CRAN (R 3.5.1)
CRAN (R 3.5.1)
Bioconductor
Bioconductor
CRAN (R 3.5.0)
cran (@0.3.1)
CRAN (R 3.5.1)
CRAN (R 3.5.1)
CRAN (R 3.5.1)
CRAN (R 3.5.1)
CRAN (R 3.5.1)
CRAN (R 3.5.1)
Bioconductor
cran (@1.0.0)
Bioconductor
CRAN (R 3.5.1)
CRAN (R 3.5.1)
CRAN (R 3.5.1)
CRAN (R 3.5.1)
CRAN (R 3.5.1)
Bioconductor
local
local
local
CRAN (R 3.5.0)
CRAN (R 3.5.1)
Bioconductor
CRAN (R 3.5.1)
cran (@2.0.1)
cran (@0.8.2)
cran (@0.2.5)
local
CRAN (R 3.5.1)
Bioconductor
local
CRAN (R 3.5.1)
Github (velocyto-team/velocyto.R@d779034)
CRAN (R 3.5.1)
CRAN (R 3.5.1)
CRAN (R 3.5.1)
CRAN (R 3.5.0)
CRAN (R 3.5.0)
CRAN (R 3.5.1)
Bioconductor
CRAN (R 3.5.0)
Bioconductor
CRAN (R 3.5.1)